from lxml import etree 

text = '''
<div>
    <ul>
         <li class="item-0">123</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-inactive"><a href="link3.html">third item</a></li>
         <li class="item-1"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
'''

# 利用etree.HTML，将字符串解析为HTML文档, 会自动加上<html><body>等标签
html = etree.HTML(text) 
#html = etree.parse('index.html')  # 也可以直接读文件, 等价于上面

# 返回Element列表
result1 = html.xpath("//li")

# 返回一个Element
element1 = result1[0]

li_text = element1.text
li_class = element1.attrib['class']
li_tag = element1.tag

print(li_text)          # 123
print(li_class)         # item-0
print(li_tag)           # li

print(html.xpath('//li/a')[0].attrib['href'])   # link2.html
